knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
# installing necessary packages
library(here)
library(tidyverse)
library(janitor)
library(kableExtra)
library(DT)
library(knitr)
library(lubridate)
library(plotly)

Reading in the urchins data and making lower snake case

urchins <- read_csv(here("data", "urchins.csv")) %>% 
  clean_names()

Section 1: Purple and Red Urchin Size Comparisons

Includes data from all sites and for both treatments

urchins_size_comp <- urchins %>% 
  select(size, common_name) %>% 
  group_by(common_name, size) 
# specifying which variables in the data set to focus on

Data Visualization of Red and Purple Urchin Sizes in cm

ggplot(data = urchins_size_comp, #creating ggplot from urchins_size_comp
       aes(x = size)) + # specifying which values to graph
  geom_histogram(aes(fill = common_name), bins = 12, color = 4) + # creating a histogram with color by the type of urchin, 12 bins per urchin, with a distinct border
  facet_wrap(~ common_name) + # creating faceted element to graph to separate by name of urchin
  labs( y = "Count of Urchins in Sample", 
        x = "Size of Urchin Diameter (cm) to nearest 0.5 cm",
        title = "Size of Purple and Red Urchin diameter (cm)", 
        caption = "Figure 1: Size of Purple and Red Urchin measured by
        diameters in cm: Count of Purple and Red Urchins by meauring
        diameter of each in centimeters to the nearest half centimeter
        to show relationship between type of urchin and size. 
        Purple Urchins tend to have more individuals between 4 and 6 cm,
        while Red Urchins have more normally distributed sizes with less
        of a distinct trend towards one size value.") + #adding visually appealing details for axis and titles and caption
  theme(plot.caption = element_text(hjust = 0)) + #left aligning caption
  guides(fill = guide_legend(title = "Type of Urchin")) #changing legend title

Adding descriptive statistics table for mean, median, standard deviation, and sample size for urchins data

urchins_stats <- urchins %>% 
  group_by(common_name) %>% 
  summarize(mean = signif(mean(size),3), 
            median = signif(median(size),3), 
            stdev = signif(sd(size),3), 
            sample_size = n())

  kable(urchins_stats, 
        caption = "Table 1: Purple and Red Urchin Statistics: Depiction of important statistics for population samples of Red and Purple Urchins along the Santa Barbara coast.", 
        col.names = c("Common Name", "Mean Size (cm)", "Median Size (cm)", "Standard Deviation (cm)", "Sample Size (total number of urchins)")) %>% 
    kable_styling(
      bootstrap_options = c("bordered", "striped", "hover"), 
      stripe_color = "darkolivegreen3"
    )
Table 1: Purple and Red Urchin Statistics: Depiction of important statistics for population samples of Red and Purple Urchins along the Santa Barbara coast.
Common Name Mean Size (cm) Median Size (cm) Standard Deviation (cm) Sample Size (total number of urchins)
Purple Urchin 4.47 4.5 1.18 31805
Red Urchin 6.78 7.0 1.89 27408

Reflection and Takeaways from Urchin Data

From the data, it is quite clear that Red Urchins are quite larger in size (cm) than Purple Urchins, as best seen in the comparison of the mean and median sizes of the urchins. Red urchins had a mean of 6.78cm while Purple Urchins had a mean of 4.47cm. However, it is important to note that the Red Urchins had a a larger standard deviation (1.89 vs. 1.18) meaning the distribution of sizes is less concentrated around the mean value, giving the Purple Urchins a higher central tendency, closer to the mean value.

Section 2: Explore Purple Urchin Counts by Site and Year

purple_urchins <- urchins %>% # new dataset from urchins data
  filter(common_name %in% "Purple Urchin") %>% # filter to only show purple urchins
  mutate(date = mdy(date)) %>%  #as.Date(date, format = "%m/%d/%Y")) %>% 
  mutate(year = year(date)) %>% # making new column with year from date
  mutate(site_full = case_when( # new column with full names as denoted below
    site %in% "NAPL" ~ "Naples Reef", 
    site %in% "AQUE" ~ "Arroyo Quemado Reef", 
    site %in% "CARP" ~ "Carpinteria Reef", 
    site %in% "MOHK" ~ "Mohawk Reef", 
    site %in% "IVEE" ~ "Isla Vista Reef")
  )

Creating a new dataset with total number of purple urchins observed by site and year

purple_urchins_filtered <- purple_urchins %>% # new data set from purple urchins
  group_by(site, year) %>% # groupin gby site and year
  select(site, year) %>% # selecting site and year only
  mutate(total_count = n()) %>% # new column to show total count at each site per year
  distinct() #getting rid of duplicates

Making a finalized graph of counts of purple urchins recorded at each site over time

purple_urchins_graph <- ggplot(data = purple_urchins_filtered, #creating a new graph from filtered purple urchins
       aes(x = year, y = total_count)) + # specifying axis labels
  geom_line(aes(color = site)) +  # line graph with color for each line as the sample site
  labs(x = "Year", # Adding axis labels
       y = " Total Count", 
       title = "Counts of Purple Urchins 
       at Each Site over Time", # adding graph title
       caption = "Figure 2: Counts of Purple 
       Urchins at Each Site over Time: Purple
       Urchin data collection at various sites
       around Santa Barbara over time, showing
       which sites had the most samples
       collected.", 
       col = "Sample Site") + #adding figure caption
  theme(plot.caption = element_text(hjust = 0)) + #specifying left alignment 
  theme(plot.title = element_text(hjust = 0))# specifying left alignment
purple_urchins_graph # visualizing purple urchins graph normally

 ggplotly(purple_urchins_graph) # adding an option to view the graph with plotly for more readability

Reflection and Answering Questions about Graph:

This graph probably does not reflect changes in purple urchin population over time. In order for this graph to reflect a decrease in purple urchin population over time, we would have to take into account a number of different factors and data pieces such as: time spent at each site, growth pressures at each site, food sources at each site, and more. These changes observed are likely a result of samples taken at various places around each site and time spent at each site as well as non-constant collection practices.

END TASK